# first thing to do is importing the data
# install.packages("readxl")
# readxl::read_excel("data_dec/water_stress.xlsx")
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
After importing the data and give it the name of d0, we are going to visualize it, by creation different plots.
##Visualization of data with plots. Create many plots.
X= Date Y= Variable Y1= Plant height Y2= Leaf number Y3= Leaf lenght Y4= Leaf width Y5= Leaf area Y7= Chlorophyll
# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)
# first we are going to visualize all the species in a plot
ggplot(d0, aes(x= Date, y= Plant_height, group= PlantId, color= Treatment)) +
geom_line()+
facet_grid(Species ~. )
## Warning: Removed 3 rows containing missing values (`geom_line()`).
In this code chunk we are trying to visualize one species, to know how to plot it first.
# For Solanum lycopersicum
s1 <- d0[d0$Species == "Solanum lycopersicum", ]
ggplot(s1, aes(x = Date, y = Plant_height, group = PlantId, color = Treatment)) +
geom_line()
Then we will create a for loop to visualize all the species and all the variables.
v1 <- c("Plant_height", "Leaf_width", "Leaf_length", "Leaf_area", "Leaf_number", "Root_length", "Chlorophyll_content", "Soil_humidity", "Electrical_conductivity")
i <- "Beta vulgaris"
variable <- "Plant_height"
for(i in levels(as.factor(d0$Species))) {
for(variable in v1) {
s1 <- d0[d0$Species==i, c(variable, "Week", "PlantId", "Treatment")]
s1 <- na.exclude(s1)
p <- ggplot(s1, aes(x= Week, y= .data[[variable]], group= PlantId, color= Treatment)) +
geom_line() +
labs(title = i)
print(p)
}
}
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
## Chapter 2 - ANOVA
in this part we are trying to do an ANOVA analysis for the data
```r
#Packages
#install.packages(c("ggplot2", "ggpubr", "tidyverse", "broom", "AICcmodavg"), , dependencies = TRUE)
library(ggplot2)
library(ggpubr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ✔ purrr 0.3.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(broom)
#Import data
d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
d0
## # A tibble: 1,260 × 23
## Group Week Date Species PlantId Use Treat…¹ Soil_…² Elect…³
## <chr> <chr> <dttm> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 G1 W1 2022-10-05 00:00:00 Solanu… Slc1 cult… c 55.6 1.31
## 2 G1 W1 2022-10-05 00:00:00 Solanu… Slc2 cult… c 52.7 1.28
## 3 G1 W1 2022-10-05 00:00:00 Solanu… Slc3 cult… c 61.3 1.24
## 4 G1 W1 2022-10-05 00:00:00 Solanu… Slc4 cult… c 51 1.36
## 5 G1 W1 2022-10-05 00:00:00 Solanu… Slc5 cult… c 52.1 1.39
## 6 G1 W1 2022-10-05 00:00:00 Solanu… Slc6 cult… c 54.4 1.24
## 7 G1 W1 2022-10-05 00:00:00 Solanu… Slc7 cult… c 51.6 1.51
## 8 G1 W1 2022-10-05 00:00:00 Amaran… Arc1 wild c 54.6 1.68
## 9 G1 W1 2022-10-05 00:00:00 Amaran… Arc2 wild c 50.9 1.93
## 10 G1 W1 2022-10-05 00:00:00 Amaran… Arc3 wild c 67.2 1.89
## # … with 1,250 more rows, 14 more variables: Too_dry <chr>, Plant_height <dbl>,
## # Leaf_number <dbl>, Leaf_length <dbl>, Leaf_width <dbl>, Leaf_area <dbl>,
## # Chlorophyll_content <dbl>, Aerial_fresh_weight <dbl>,
## # Aerial_dry_weight <dbl>, Root_length <dbl>, Roots_fresh_weight <dbl>,
## # Roots_dry_weight <dbl>, Mortality <chr>, Comments <lgl>, and abbreviated
## # variable names ¹​Treatment, ²​Soil_humidity, ³​Electrical_conductivity
names(d0)
## [1] "Group" "Week"
## [3] "Date" "Species"
## [5] "PlantId" "Use"
## [7] "Treatment" "Soil_humidity"
## [9] "Electrical_conductivity" "Too_dry"
## [11] "Plant_height" "Leaf_number"
## [13] "Leaf_length" "Leaf_width"
## [15] "Leaf_area" "Chlorophyll_content"
## [17] "Aerial_fresh_weight" "Aerial_dry_weight"
## [19] "Root_length" "Roots_fresh_weight"
## [21] "Roots_dry_weight" "Mortality"
## [23] "Comments"
Most visual difference is in week 6 (w6)
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Plant_height")]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Plant_height ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Plant_height
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 466 232.8 7.5815 0.000672 ***
## Species 8 59922 7490.3 243.9062 < 2.2e-16 ***
## Residuals 198 6081 30.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Plant_height ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.3858 -2.9645 -0.2497 2.4476 21.2022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.9645 1.0121 14.785 < 2e-16 ***
## Treatmenti -0.2671 0.9367 -0.285 0.775794
## Treatments -3.4121 0.9402 -3.629 0.000362 ***
## SpeciesBeta vulgaris 3.9026 1.5058 2.592 0.010261 *
## SpeciesHordeum vulgare 37.1571 1.4811 25.088 < 2e-16 ***
## SpeciesLolium perenne 26.6762 1.4811 18.011 < 2e-16 ***
## SpeciesPortulaca oleracea -7.0476 1.4811 -4.758 3.76e-06 ***
## SpeciesRaphanus sativus 6.0476 1.4811 4.083 6.44e-05 ***
## SpeciesSolanum lycopersicum 44.8333 1.4811 30.271 < 2e-16 ***
## SpeciesSonchus oleraceus 4.4619 1.4811 3.013 0.002928 **
## SpeciesSpinacia oleracea 0.9381 1.4811 0.633 0.527208
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.542 on 198 degrees of freedom
## Multiple R-squared: 0.9085, Adjusted R-squared: 0.9039
## F-statistic: 196.6 on 10 and 198 DF, p-value: < 2.2e-16
ggplot(d1, aes(x = Treatment, y = Plant_height, fill = Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_number
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 13.74 6.871 4.1003 0.01799 *
## Species 8 618.29 77.286 46.1175 < 2e-16 ***
## Residuals 199 333.50 1.676
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_number") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_number ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_number
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 304.1 152.06 15.356 6.315e-07 ***
## Species 8 3971.2 496.40 50.128 < 2.2e-16 ***
## Residuals 198 1960.7 9.90
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_number, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 2.0 0.99 0.5525 0.5764
## Species 8 5750.9 718.87 402.4626 <2e-16 ***
## Residuals 199 355.4 1.79
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 79.8 39.9 6.1425 0.002581 **
## Species 8 30954.5 3869.3 595.6664 < 2.2e-16 ***
## Residuals 198 1286.2 6.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y= Leaf_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_width
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 3.62 1.809 5.1477 0.006612 **
## Species 8 704.03 88.004 250.4667 < 2.2e-16 ***
## Residuals 199 69.92 0.351
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_width") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_width ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_width
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 30.0 15.00 11.645 1.654e-05 ***
## Species 8 5014.3 626.78 486.695 < 2.2e-16 ***
## Residuals 198 255.0 1.29
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_width, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W1" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_area
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 128.4 64.21 4.3722 0.01386 *
## Species 8 11029.3 1378.66 93.8798 < 2e-16 ***
## Residuals 199 2922.4 14.69
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Leaf_area") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Leaf_area ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Leaf_area
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 7957 3978.3 29.755 5.025e-12 ***
## Species 8 153005 19125.7 143.048 < 2.2e-16 ***
## Residuals 198 26473 133.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Leaf_area, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W3" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 107.26 53.630 6.5679 0.002311 **
## Species 3 298.82 99.607 12.1985 1.258e-06 ***
## Residuals 78 636.91 8.166
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
###### Week 4
d1 <- d0[d0$Week == "W4" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 15.56 7.780 0.8133 0.4459
## Species 5 533.87 106.774 11.1623 8.283e-09 ***
## Residuals 117 1119.17 9.566
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
###### Week 5
d1 <- d0[d0$Week == "W5" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 23.87 11.94 0.8801 0.4168
## Species 6 2018.13 336.36 24.7988 <2e-16 ***
## Residuals 158 2143.01 13.56
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
###### Week 6
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Chlorophyll_content") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Chlorophyll_content ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Chlorophyll_content
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 20.7 10.36 0.5908 0.5551
## Species 6 4832.8 805.47 45.9083 <2e-16 ***
## Residuals 158 2772.1 17.55
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
ggplot(d1, aes(x= Treatment, y=Chlorophyll_content, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_fresh_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Aerial_fresh_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Aerial_fresh_weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 1676.5 838.27 62.861 < 2.2e-16 ***
## Species 8 5586.5 698.32 52.366 < 2.2e-16 ***
## Residuals 199 2653.7 13.34
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Aerial_fresh_weight ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8478 -2.7565 -0.1866 2.4259 12.7100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4190 0.6667 6.628 3.12e-10 ***
## Treatmenti -3.8374 0.6173 -6.217 2.92e-09 ***
## Treatments -6.9069 0.6173 -11.190 < 2e-16 ***
## SpeciesBeta vulgaris 10.7119 0.9760 10.976 < 2e-16 ***
## SpeciesHordeum vulgare 5.6719 0.9760 5.812 2.43e-08 ***
## SpeciesLolium perenne 1.0219 0.9760 1.047 0.296342
## SpeciesPortulaca oleracea 0.6705 0.9760 0.687 0.492895
## SpeciesRaphanus sativus 8.0210 0.9760 8.218 2.61e-14 ***
## SpeciesSolanum lycopersicum 15.2586 0.9760 15.634 < 2e-16 ***
## SpeciesSonchus oleraceus 10.9362 0.9760 11.205 < 2e-16 ***
## SpeciesSpinacia oleracea 3.5290 0.9760 3.616 0.000379 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.652 on 199 degrees of freedom
## Multiple R-squared: 0.7324, Adjusted R-squared: 0.719
## F-statistic: 54.46 on 10 and 199 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_fresh_weight, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Aerial_dry_weight") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Aerial_dry_weight ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Aerial_dry_weight
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 2.501 1.2503 16.477 2.488e-07 ***
## Species 8 53.289 6.6611 87.784 < 2.2e-16 ***
## Residuals 192 14.569 0.0759
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Aerial_dry_weight ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80832 -0.13388 -0.04531 0.14231 1.37768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.204034 0.050646 4.029 8.08e-05 ***
## Treatmenti -0.087659 0.047782 -1.835 0.0681 .
## Treatments -0.291585 0.047327 -6.161 4.16e-09 ***
## SpeciesBeta vulgaris 0.649136 0.077647 8.360 1.25e-14 ***
## SpeciesHordeum vulgare 0.612857 0.073621 8.325 1.56e-14 ***
## SpeciesLolium perenne 0.080000 0.073621 1.087 0.2786
## SpeciesPortulaca oleracea -0.004762 0.073621 -0.065 0.9485
## SpeciesRaphanus sativus 0.801429 0.073621 10.886 < 2e-16 ***
## SpeciesSolanum lycopersicum 1.464286 0.073621 19.890 < 2e-16 ***
## SpeciesSonchus oleraceus 1.228283 0.079249 15.499 < 2e-16 ***
## SpeciesSpinacia oleracea 0.140000 0.073621 1.902 0.0587 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2755 on 192 degrees of freedom
## Multiple R-squared: 0.7929, Adjusted R-squared: 0.7821
## F-statistic: 73.52 on 10 and 192 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Aerial_dry_weight, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
d1 <- d0[d0$Week == "W6" , c("Treatment", "Species", "Root_length") ]
d1 <- na.exclude(d1) #remove the ones with empty cells
lm1 <- lm(Root_length ~ Treatment + Species, data=d1)
anova(lm1)
## Analysis of Variance Table
##
## Response: Root_length
## Df Sum Sq Mean Sq F value Pr(>F)
## Treatment 2 38.4 19.19 0.5121 0.6
## Species 8 19277.3 2409.66 64.2873 <2e-16 ***
## Residuals 199 7459.1 37.48
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm1)
##
## Call:
## lm(formula = Root_length ~ Treatment + Species, data = d1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.2830 -2.8891 -0.4475 1.9244 27.2872
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1226 1.1178 3.688 0.000291 ***
## Treatmenti -0.3986 1.0349 -0.385 0.700500
## Treatments 0.6394 1.0349 0.618 0.537392
## SpeciesBeta vulgaris 16.0534 1.6363 9.811 < 2e-16 ***
## SpeciesHordeum vulgare 20.5303 1.6363 12.547 < 2e-16 ***
## SpeciesLolium perenne 10.8008 1.6363 6.601 3.63e-10 ***
## SpeciesPortulaca oleracea 0.2543 1.6363 0.155 0.876635
## SpeciesRaphanus sativus 23.3591 1.6363 14.276 < 2e-16 ***
## SpeciesSolanum lycopersicum 20.8115 1.6363 12.719 < 2e-16 ***
## SpeciesSonchus oleraceus 23.0820 1.6363 14.107 < 2e-16 ***
## SpeciesSpinacia oleracea 3.5060 1.6363 2.143 0.033355 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.122 on 199 degrees of freedom
## Multiple R-squared: 0.7214, Adjusted R-squared: 0.7074
## F-statistic: 51.53 on 10 and 199 DF, p-value: < 2.2e-16
ggplot(d1, aes(x= Treatment, y=Root_length, fill= Species)) +
geom_boxplot() +
facet_grid(. ~ Species) +
theme(strip.text.x = element_text(size=0))
in this part we are going to do a PCA analysis for the data
#importing data
#we already imported the data in the previous parts, that's why the functions of importing the data are commented
#readxl::read_excel("data_dec/water_stress.xlsx")
#d0 <- readxl::read_excel("data_dec/water_stress.xlsx", sheet = "Data")
Next we need to make a subset for the data with only numerical variables and exclude the other variables, scale the data and after that we will exclude all the missing data
PCA_data <- d0[c(8:9, 11:21)]
PCA_data_scaled <- as.data.frame(scale(PCA_data))
view(PCA_data_scaled)
# exclude missing values NA
PCA_data01 <- na.exclude(PCA_data_scaled)
now we are going to do a PCA of the data
PCA <- prcomp(PCA_data01, scale = FALSE)
print(PCA)
## Standard deviations (1, .., p=13):
## [1] 2.64322013 1.42965415 1.25678058 1.15025582 0.60623801 0.45306262
## [7] 0.27287074 0.23294676 0.21466349 0.18793117 0.16714952 0.12251860
## [13] 0.04945087
##
## Rotation (n x k) = (13 x 13):
## PC1 PC2 PC3 PC4
## Soil_humidity -0.004484399 0.049697147 -0.11723720 -0.248423655
## Electrical_conductivity -0.008044875 -0.006001977 -0.01643564 -0.037264645
## Plant_height -0.317876839 -0.019322001 -0.14588964 -0.383332098
## Leaf_number 0.191695542 -0.636215879 0.64619422 -0.354302767
## Leaf_length -0.204597818 -0.120259300 -0.25419740 -0.067658522
## Leaf_width -0.403324908 -0.113622120 -0.12480722 -0.279492301
## Leaf_area -0.460454617 -0.066851259 0.02718646 -0.113601184
## Chlorophyll_content -0.028883692 -0.719939012 -0.40819495 0.500269950
## Aerial_fresh_weight -0.354084459 -0.114177787 0.04662476 0.006153558
## Aerial_dry_weight -0.308108066 -0.059717159 0.04332706 -0.089751120
## Root_length -0.231145679 -0.001854812 0.10408313 0.102712787
## Roots_fresh_weight -0.362604484 0.150905948 0.49832289 0.524473335
## Roots_dry_weight -0.198800432 0.053083675 0.19070678 0.157603575
## PC5 PC6 PC7 PC8
## Soil_humidity 0.81679444 0.28302620 0.10935536 0.15755331
## Electrical_conductivity 0.36647667 0.21907930 -0.16776733 -0.28746814
## Plant_height -0.01464742 -0.29041881 -0.33800754 -0.39856999
## Leaf_number 0.04191206 -0.05574685 -0.04328160 0.03493110
## Leaf_length 0.06291150 -0.23212135 -0.20135610 0.18225842
## Leaf_width 0.04636750 -0.30319480 -0.03303809 0.05749506
## Leaf_area -0.10873503 0.21608704 0.05655006 0.33231120
## Chlorophyll_content 0.12018037 -0.01247082 0.02255428 -0.09369632
## Aerial_fresh_weight -0.26351706 0.68727077 -0.27316223 0.10132413
## Aerial_dry_weight -0.07214602 0.12124570 0.74137362 -0.50267692
## Root_length 0.08617403 -0.28072317 0.35383667 0.52643216
## Roots_fresh_weight 0.27132068 -0.14341942 -0.21909866 -0.15424642
## Roots_dry_weight 0.09224390 -0.07332690 -0.03120358 -0.11337469
## PC9 PC10 PC11 PC12
## Soil_humidity -0.265854675 0.256000023 -0.021742611 0.03570246
## Electrical_conductivity 0.544832993 -0.608963020 0.141407519 -0.13048759
## Plant_height -0.211222723 0.177138574 0.426567659 -0.32794203
## Leaf_number -0.007399102 -0.001980915 -0.066542393 -0.05415789
## Leaf_length -0.023428358 -0.157837279 -0.727494654 -0.43357987
## Leaf_width 0.043504934 -0.215075601 -0.081496233 0.75990672
## Leaf_area 0.612827517 0.447294277 0.086226340 -0.12079011
## Chlorophyll_content -0.011741586 0.112225276 0.160531959 0.03938422
## Aerial_fresh_weight -0.410458809 -0.251217825 0.010582180 0.02922203
## Aerial_dry_weight -0.065107103 -0.045205220 -0.212166820 -0.10698135
## Root_length -0.175927507 -0.407510329 0.405559238 -0.27149911
## Roots_fresh_weight -0.033343533 0.120409932 -0.120142968 0.05697080
## Roots_dry_weight -0.069956207 0.051885440 -0.002888916 -0.01928205
## PC13
## Soil_humidity -0.007103540
## Electrical_conductivity 0.021300917
## Plant_height -0.084793094
## Leaf_number -0.002703077
## Leaf_length 0.013467758
## Leaf_width 0.001399967
## Leaf_area 0.008221652
## Chlorophyll_content 0.004457611
## Aerial_fresh_weight -0.012514819
## Aerial_dry_weight -0.081265406
## Root_length -0.037644747
## Roots_fresh_weight -0.350823081
## Roots_dry_weight 0.927778627
now we will plot the PCA results
# Plotting the PCA results
# install.packages("factoextra")
#if(!require(devtools)) install.packages("devtools")
#devtools::install_github("kassambara/factoextra")
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(ggplot2)
fviz_eig(PCA)
# graph for individuals
fviz_pca_ind(PCA,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
## Warning: ggrepel: 32 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
<<<<<<< HEAD
# graph of variable
fviz_pca_var(PCA,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)
<<<<<<< HEAD